In the following section check if there is a relation between physico-chemical properties and the observed differences between the extraction methods.
Use here the protein datasets.
setwd("~/GitLab/20211202_extraction_method/")
## load cells data set
cells <- maxQuant("proteinGroups_overview.xlsx", intensity = "LFQ",
sheet = "cells", type = "xlsx")
cells <- assay(cells) %>%
transformAssay(method = "log") %>%
MatrixQCvis:::updateSE(cells, assay = .)
## load fresh-frozen data set
ff <- maxQuant("proteinGroups_overview.xlsx", intensity = "LFQ",
sheet = "Fresh-frozen", type = "xlsx")
ff <- assay(ff) %>%
transformAssay(method = "log") %>%
MatrixQCvis:::updateSE(ff, assay = .)
## load FFPE data set
ffpe <- maxQuant("proteinGroups_overview.xlsx", intensity = "LFQ",
sheet = "FFPE", type = "xlsx")
ffpe <- assay(ffpe) %>%
transformAssay(method = "log") %>%
MatrixQCvis:::updateSE(ffpe, assay = .)
## load Serum-plasma
plasma_serum <- maxQuant("proteinGroups_overview.xlsx", intensity = "LFQ",
sheet = "Serum-Plasma", type = "xlsx")
plasma_serum <- assay(plasma_serum) %>%
transformAssay(method = "log") %>%
MatrixQCvis:::updateSE(plasma_serum, assay = .)column_keep <- c("Sample_IDs", "condition", "LFQ")
## load the annotation
annot <- openxlsx::read.xlsx("proteinGroups_overview.xlsx",
sheet = "annotation", startRow = 2)
annot <- annot[, column_keep]
annot <- annot[!annot$condition %in% "ignore", ]
annot <- annot[!is.na(annot$condition), ]
## truncate the annotation for Cells
annot_cells <- annot[grep(annot$condition, pattern = "Cells"), ]
## truncate the annotation for Fresh-frozen
annot_ff <- annot[grep(annot$Sample_IDs, pattern = "powder_tissue_AFA_|_T"), ]
## truncate the annotation for FFPE
annot_ffpe <- annot[grep(annot$Sample_IDs, pattern = "_FFPE"), ]
## truncate the annotation for Plasma and Serum
annot_plasma <- annot[grep(annot$condition, pattern = "Plasma"), ]
annot_serum <- annot[grep(annot$condition, pattern = "Serum"), ]Load the protein FASTA files and cut the sequence names.
library(Biostrings)
hs <- readAAStringSet("2021_03_30_Uniprot_homo_sapiens_canonical - Copy.fasta")
mm <- readAAStringSet("20190204_uniprot-mus+musculus-filtered-reviewed_yes.fasta")
## for cells, ffpe, plasma, serum = human
names_hs <- names(hs@ranges)
names_hs <- strsplit(names_hs, split = "[|]")
names_hs <- unlist(lapply(names_hs, "[", 2))
hs <- data.frame(feature = names_hs,
sequence = as.character(unlist(lapply(hs, as.character))))
## for FF = mouse
names_mm <- names(mm@ranges)
names_mm <- strsplit(names_mm, split = "[|]")
names_mm <- unlist(lapply(names_mm, "[", 2))
mm <- data.frame(feature = names_mm,
sequence = as.character(unlist(lapply(mm, as.character))))Write the annotations and the sequences to the colData
and rowData slots.
## cells
annot_cells$Sample_IDs <- make.names(annot_cells$Sample_IDs)
cells <- cells[, annot_cells$Sample_IDs]
cD <- colData(cells) %>% as.data.frame()
rD <- rowData(cells) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
cells@colData <- left_join(cD, annot_cells, by = c("name" = "Sample_IDs")) %>%
DataFrame()
rowData(cells) <- left_join(rD, hs, by = c("feature_cut" = "feature")) %>%
DataFrame()
## fresh-frozen
annot_ff$Sample_IDs <- make.names(annot_ff$Sample_IDs)
ff <- ff[, annot_ff$Sample_IDs]
cD <- colData(ff) %>% as.data.frame()
rD <- rowData(ff) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
ff@colData <- left_join(cD, annot_ff, by = c("name" = "Sample_IDs")) %>%
DataFrame()
rowData(ff) <- left_join(rD, mm, by = c("feature_cut" = "feature")) %>%
DataFrame()
## FFPE
annot_ffpe$Sample_IDs <- make.names(annot_ffpe$Sample_IDs)
ffpe <- ffpe[, annot_ffpe$Sample_IDs]
cD <- colData(ffpe) %>% as.data.frame()
rD <- rowData(ffpe) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
ffpe@colData <- left_join(cD, annot_ffpe, by = c("name" = "Sample_IDs")) %>%
DataFrame()
rowData(ffpe) <- left_join(rD, hs, by = c("feature_cut" = "feature")) %>%
DataFrame()
## Plasma
annot_plasma$Sample_IDs <- make.names(annot_plasma$Sample_IDs)
plasma <- plasma_serum[, annot_plasma$Sample_IDs]
cD <- colData(plasma) %>% as.data.frame()
rD <- rowData(plasma) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
plasma@colData <- left_join(cD, annot_plasma, by = c("name" = "Sample_IDs")) %>%
DataFrame()
rowData(plasma) <- left_join(rD, hs, by = c("feature_cut" = "feature")) %>%
DataFrame()
## Serum
annot_serum$Sample_IDs <- make.names(annot_serum$Sample_IDs)
serum <- plasma_serum[, annot_serum$Sample_IDs]
cD <- colData(serum) %>% as.data.frame()
rD <- rowData(serum) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
serum@colData <- left_join(cD, annot_serum, by = c("name" = "Sample_IDs")) %>%
DataFrame()
rowData(serum) <- left_join(rD, hs, by = c("feature_cut" = "feature")) %>%
DataFrame()For the differential expression analysis use in all cases the following scheme:
lmFit and method "ls",contrasts.fit)lmFit, compute moderated
t-statistics, moderated F-statistic, and log-odds of differential
expression by empirical Bayes moderation of the standard errors towards
a global value (using eBayes)## set parameters for differential expression (num only for display)
num <- Inf
p_val <- 1
adj <- "BH"contrasts:
The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.
## png
## 2
Only continue with the shared features in the following analyses.
## [1] "mean: Cells_AFA"
## [1] 1.493834
## [1] "mean: Cells_TwoPhase_AFA"
## [1] 1.594873
##
## Wilcoxon signed rank test with continuity correction
##
## data: cv_a and cv_b
## V = 4226497, p-value = 0.003558
## alternative hypothesis: true location shift is not equal to 0
The TwoPhase method shows higher CV compared to the traditional AFA method.
Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).
## [1] "## BP"
## [1] "## MF"
## [1] "## CC"
Take the t-values and plot against the GRAVY-Score and Isoelectric point
t <- tT[, "t"]
## GRAVY score
rD <- rowData(cells)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_cor).
## Warning: Removed 6 rows containing missing values (geom_point).
cor.test(gravy, t, method = "spearman")## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: gravy and t
## S = 1.5895e+10, p-value = 0.9366
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.00117734
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
cor.test(iep, t, method = "spearman")## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: iep and t
## S = 1.629e+10, p-value = 0.1363
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.02203543
Create violin plots for shared/unique features.
## gravy
feat <- read.table(file = "unique_features_cells_Cells_AFA.txt",
header = TRUE)
gravy_other <- lapply(rowData(cells_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_cells_Cells_TwoPhase_AFA.txt",
header = TRUE)
gravy_tp <- lapply(rowData(cells_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(gravy), "shared"),
cbind(unlist(gravy_other), "autoSP3"),
cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_gravy_cells.pdf",
method = "wilcox.test", paired = FALSE)## Warning: Removed 6 rows containing non-finite values (stat_ydensity).
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
## Warning: Removed 6 rows containing non-finite values (stat_signif).
## Warning: Removed 6 rows containing non-finite values (stat_ydensity).
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
## Warning: Removed 6 rows containing non-finite values (stat_signif).
## iep
feat <- read.table(file = "unique_features_cells_Cells_AFA.txt",
header = TRUE)
iep_other <- lapply(rowData(cells_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_cells_Cells_TwoPhase_AFA.txt",
header = TRUE)
iep_tp <- lapply(rowData(cells_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(iep), "shared"),
cbind(unlist(iep_other), "autoSP3"),
cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_iep_cells.pdf",
method = "wilcox.test", paired = FALSE)contrasts:
Upset plot for all condition types.
## png
## 2
The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.
Only continue with the shared features in the following analyses.
## [1] "mean: powder_AFA"
## [1] 1.67778
## [1] "mean: powder_TwoPhase_AFA"
## [1] 2.082931
##
## Wilcoxon signed rank test with continuity correction
##
## data: cv_a and cv_b
## V = 2409681, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
## Warning: Partial NA coefficients for 472 probe(s)
## [1] "## contrast: powder_AFA - powder_TwoPhase_AFA"
Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).
## [1] "## BP"
## [1] "## MF"
## [1] "## CC"
Take the t-values and plot against the GRAVY-Score and Isoelectric point
t <- tT[, "t"]
## GRAVY score
rD <- rowData(ff)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_cor).
## Warning: Removed 12 rows containing missing values (geom_point).
cor.test(gravy, t, method = "spearman")## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: gravy and t
## S = 7789494545, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3138748
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
cor.test(iep, t, method = "spearman")## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: iep and t
## S = 1.0298e+10, p-value = 9.888e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1008457
Create violin plots for shared/unique features.
## gravy
feat <- read.table(file = "unique_features_ff_contrast1_powder.txt",
header = TRUE)
gravy_other <- lapply(rowData(ff_1_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_ff_contrast1_powder_TwoPhase.txt",
header = TRUE)
gravy_tp <- lapply(rowData(ff_1_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(gravy), "shared"),
cbind(unlist(gravy_other), "autoSP3"),
cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_gravy_freshfrozen_1.pdf",
method = "wilcox.test", paired = FALSE)## Warning: Removed 14 rows containing non-finite values (stat_ydensity).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_signif).
## Warning: Removed 14 rows containing non-finite values (stat_ydensity).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_signif).
## iep
feat <- read.table(file = "unique_features_ff_contrast1_powder.txt",
header = TRUE)
iep_other <- lapply(rowData(ff_1_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_ff_contrast1_powder_TwoPhase.txt",
header = TRUE)
iep_tp <- lapply(rowData(ff_1_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(iep), "shared"),
cbind(unlist(iep_other), "autoSP3"),
cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_iep_freshfrozen_1.pdf",
method = "wilcox.test", paired = FALSE)The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.
Only continue with the shared features in the following analyses.
## [1] "mean: Tissue_bulk_AFA"
## [1] 2.394138
## [1] "mean: powder_TwoPhase_AFA"
## [1] 2.05866
##
## Wilcoxon signed rank test with continuity correction
##
## data: cv_a and cv_b
## V = 3740512, p-value = 4.703e-11
## alternative hypothesis: true location shift is not equal to 0
## [1] "## contrast: Tissue_bulk_AFA - powder_TwoPhase_AFA"
Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).
## [1] "## BP"
## [1] "## MF"
## [1] "## CC"
Take the t-values and plot against the GRAVY-Score and Isoelectric point
t <- tT[, "t"]
## GRAVY score
rD <- rowData(ff)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing non-finite values (stat_cor).
## Warning: Removed 10 rows containing missing values (geom_point).
cor.test(gravy, t, method = "spearman")## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: gravy and t
## S = 7986424933, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2711298
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
cor.test(iep, t, method = "spearman")## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: iep and t
## S = 9982874054, p-value = 1.079e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.09566522
Create violin plots for shared/unique features.
## gravy
feat <- read.table(file = "unique_features_ff_contrast2_Tissue_bulk.txt",
header = TRUE)
gravy_other <- lapply(rowData(ff_2_all)[feat[, 1], "sequence"], function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_ff_contrast2_powder_TwoPhase.txt",
header = TRUE)
gravy_tp <- lapply(rowData(ff_2_all)[feat[, 1], "sequence"], function(aa) calculateGravyScore(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(gravy), "shared"),
cbind(unlist(gravy_other), "autoSP3"),
cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_gravy_freshfrozen_2.pdf",
method = "wilcox.test", paired = FALSE)## Warning: Removed 14 rows containing non-finite values (stat_ydensity).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_signif).
## Warning: Removed 14 rows containing non-finite values (stat_ydensity).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_signif).
## iep
feat <- read.table(file = "unique_features_ff_contrast2_Tissue_bulk.txt",
header = TRUE)
iep_other <- lapply(rowData(ff_2_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_ff_contrast2_powder_TwoPhase.txt",
header = TRUE)
iep_tp <- lapply(rowData(ff_2_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(iep), "shared"),
cbind(unlist(iep_other), "autoSP3"),
cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_iep_freshfrozen_2.pdf",
method = "wilcox.test", paired = FALSE)contrasts:
Upset plot for all condition types.
## png
## 2
The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.
Only continue with the shared features in the following analyses.
## [1] "mean: powder_AFA"
## [1] 3.338009
## [1] "mean: powder_TwoPhase_AFA"
## [1] 2.251171
##
## Wilcoxon signed rank test with continuity correction
##
## data: cv_a and cv_b
## V = 3069548, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
## Warning: Partial NA coefficients for 561 probe(s)
## [1] "## contrast: powder_AFA - powder_TwoPhase_AFA"
Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).
## [1] "## BP"
## [1] "## MF"
## [1] "## CC"
Take the t-values and plot against the GRAVY-Score and Isoelectric point
t <- tT[, "t"]
## GRAVY score
rD <- rowData(ffpe)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_cor).
## Warning: Removed 6 rows containing missing values (geom_point).
cor.test(gravy, t, method = "spearman")## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: gravy and t
## S = 5925479107, p-value = 0.02808
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.03805362
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
cor.test(iep, t, method = "spearman")## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: iep and t
## S = 6279457713, p-value = 0.4214
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.01392261
Create violin plots for shared/unique features.
## gravy
feat <- read.table(file = "unique_features_ffpe_contrast1_powder.txt",
header = TRUE)
gravy_other <- lapply(rowData(ffpe_1_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_ffpe_contrast1_TwoPhase.txt",
header = TRUE)
gravy_tp <- lapply(rowData(ffpe_1_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(gravy), "shared"),
cbind(unlist(gravy_other), "autoSP3"),
cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_gravy_FFPE_1.pdf",
method = "wilcox.test", paired = FALSE)## Warning: Removed 7 rows containing non-finite values (stat_ydensity).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_signif).
## Warning: Removed 7 rows containing non-finite values (stat_ydensity).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_signif).
## iep
feat <- read.table(file = "unique_features_ffpe_contrast1_powder.txt",
header = TRUE)
iep_other <- lapply(rowData(ffpe_1_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_ffpe_contrast1_TwoPhase.txt",
header = TRUE)
iep_tp <- lapply(rowData(ffpe_1_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(iep), "shared"),
cbind(unlist(iep_other), "autoSP3"),
cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_iep_FFPE_1.pdf",
method = "wilcox.test", paired = FALSE)The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.
Only continue with the shared features in the following analyses.
## [1] "mean: FFPE_AFA"
## [1] 3.115173
## [1] "mean: powder_TwoPhase_AFA"
## [1] 2.295979
##
## Wilcoxon signed rank test with continuity correction
##
## data: cv_a and cv_b
## V = 3486655, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
## [1] "## contrast: FFPE_AFA - powder_TwoPhase_AFA"
Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).
## [1] "## BP"
## [1] "## MF"
## [1] "## CC"
Take the t-values and plot against the GRAVY-Score and Isoelectric point
t <- tT[, "t"]
## GRAVY score
rD <- rowData(ffpe)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing non-finite values (stat_cor).
## Warning: Removed 7 rows containing missing values (geom_point).
cor.test(gravy, t, method = "spearman")## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: gravy and t
## S = 5145845681, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2648622
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
cor.test(iep, t, method = "spearman")## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: iep and t
## S = 7100055161, p-value = 0.628
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.008213679
Create violin plots for shared/unique features.
## gravy
feat <- read.table(file = "unique_features_ffpe_contrast2_FFPE.txt",
header = TRUE)
gravy_other <- lapply(rowData(ffpe_2_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_ffpe_contrast2_powder_TwoPhase.txt",
header = TRUE)
gravy_tp <- lapply(rowData(ffpe_2_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(gravy), "shared"),
cbind(unlist(gravy_other), "autoSP3"),
cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_gravy_FFPE_2.pdf",
method = "wilcox.test", paired = FALSE)## Warning: Removed 7 rows containing non-finite values (stat_ydensity).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_signif).
## Warning: Removed 7 rows containing non-finite values (stat_ydensity).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_signif).
## iep
feat <- read.table(file = "unique_features_ffpe_contrast2_FFPE.txt",
header = TRUE)
iep_other <- lapply(rowData(ffpe_2_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_ffpe_contrast2_powder_TwoPhase.txt",
header = TRUE)
iep_tp <- lapply(rowData(ffpe_2_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(iep), "shared"),
cbind(unlist(iep_other), "autoSP3"),
cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_iep_FFPE_2.pdf",
method = "wilcox.test", paired = FALSE)contrasts:
Upset plot for all condition types.
## png
## 2
The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.
Only continue with the shared features in the following analyses.
## [1] "mean: Plasma_AFA"
## [1] 2.324831
## [1] "mean: Plasma_TwoPhase_AFA"
## [1] 1.637527
##
## Wilcoxon signed rank test with continuity correction
##
## data: cv_a and cv_b
## V = 19615, p-value = 6.259e-09
## alternative hypothesis: true location shift is not equal to 0
## [1] "## contrast: plasma_AFA - plasma_TwoPhase_AFA"
Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).
## [1] "## BP"
## [1] "## MF"
## [1] "## CC"
Take the t-values and plot against the GRAVY-Score and Isoelectric point.
t <- tT[, "t"]
## GRAVY score
rD <- rowData(plasma)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing non-finite values (stat_cor).
## Warning: Removed 3 rows containing missing values (geom_point).
cor.test(gravy, t, method = "spearman")## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: gravy and t
## S = 2674532, p-value = 0.7424
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.02072509
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
cor.test(iep, t, method = "spearman")## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: iep and t
## S = 2523773, p-value = 0.08425
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.10791
Create violin plots for shared/unique features.
## gravy
feat <- read.table(file = "unique_features_plasma_Plasma_AFA.txt",
header = TRUE)
gravy_other <- lapply(rowData(plasma_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_plasma_Plasma_TwoPhase.txt",
header = TRUE)
gravy_tp <- lapply(rowData(plasma_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(gravy), "shared"),
cbind(unlist(gravy_other), "autoSP3"),
cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_gravy_plasma.pdf",
method = "wilcox.test", paired = FALSE)## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_signif).
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_signif).
## iep
feat <- read.table(file = "unique_features_plasma_Plasma_AFA.txt",
header = TRUE)
iep_other <- lapply(rowData(plasma_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_plasma_Plasma_TwoPhase.txt",
header = TRUE)
iep_tp <- lapply(rowData(plasma_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(iep), "shared"),
cbind(unlist(iep_other), "autoSP3"),
cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_iep_plasma.pdf",
method = "wilcox.test", paired = FALSE)contrasts:
Upset plot for all condition types.
## png
## 2
The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.
Only continue with the shared features in the following analyses.
## [1] "mean: Serum_AFA"
## [1] 1.945501
## [1] "mean: Serum_TwoPhase_AFA"
## [1] 1.94307
##
## Wilcoxon signed rank test with continuity correction
##
## data: cv_a and cv_b
## V = 12006, p-value = 0.3543
## alternative hypothesis: true location shift is not equal to 0
## [1] "## contrast: Serum_AFA - Serum_TwoPhase_AFA"
Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).
## [1] "## BP"
## [1] "## MF"
## [1] "## CC"
Take the t-values and plot against the GRAVY-Score and Isoelectric point.
t <- tT[, "t"]
## GRAVY score
rD <- rowData(serum)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing non-finite values (stat_cor).
## Warning: Removed 3 rows containing missing values (geom_point).
cor.test(gravy, t, method = "spearman")## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: gravy and t
## S = 2201955, p-value = 0.9083
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.007518585
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) +
geom_point(alpha = 0.3) +
geom_smooth(method = lm, se = FALSE) +
ggpubr::stat_cor(method = "spearman") +
theme_classic()## `geom_smooth()` using formula 'y ~ x'
cor.test(iep, t, method = "spearman")## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
##
## Spearman's rank correlation rho
##
## data: iep and t
## S = 2268889, p-value = 0.8145
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.01522211
Create violin plots for shared/unique features.
## gravy
feat <- read.table(file = "unique_features_serum_Serum_AFA.txt",
header = TRUE)
gravy_other <- lapply(rowData(serum_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_serum_Serum_TwoPhase.txt",
header = TRUE)
gravy_tp <- lapply(rowData(serum_all)[feat[, 1], "sequence"],
function(aa) calculateGravyScore(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(gravy), "shared"),
cbind(unlist(gravy_other), "autoSP3"),
cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_gravy_serum.pdf",
method = "wilcox.test", paired = FALSE)## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_signif).
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_signif).
## iep
feat <- read.table(file = "unique_features_serum_Serum_AFA.txt",
header = TRUE)
iep_other <- lapply(rowData(serum_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_serum_Serum_TwoPhase.txt",
header = TRUE)
iep_tp <- lapply(rowData(serum_all)[feat[, 1], "sequence"],
function(aa) calculateIsoelectricPoint(aa))
## create df
df <- data.frame(rbind(
cbind(unlist(iep), "shared"),
cbind(unlist(iep_other), "autoSP3"),
cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type")
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared"))
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"),
c("autoSP3", "MTBE-SP3"))
plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
comparisons = comparisons, file = "protein_violin_iep_serum.pdf",
method = "wilcox.test", paired = FALSE)cv_cells_a <- assay(cells)[feat_cells, cells$condition == "Cells_AFA"]
cv_cells_b <- assay(cells)[feat_cells, cells$condition == "Cells_TwoPhase_AFA"]
cv_ff_a <- assay(ff)[feat_ff, ff$condition == "powder_AFA"]
cv_ff_b <- assay(ff)[feat_ff, ff$condition == "powder_TwoPhase_AFA"]
cv_ff_c <- assay(ff)[feat_ff, ff$condition == "Tissue_bulk_AFA"]
cv_ffpe_a <- assay(ffpe)[feat_ffpe, ffpe$condition == "powder_AFA"]
cv_ffpe_b <- assay(ffpe)[feat_ffpe, ffpe$condition == "powder_TwoPhase_AFA"]
cv_ffpe_c <- assay(ffpe)[feat_ffpe, ffpe$condition == "FFPE_AFA"]
cv_plasma_a <- assay(plasma)[feat_plasma, plasma$condition == "Plasma_TwoPhase_AFA"]
cv_plasma_b <- assay(plasma)[feat_plasma, plasma$condition == "Plasma_AFA"]
cv_serum_a <- assay(serum)[feat_serum, serum$condition == "Serum_TwoPhase_AFA"]
cv_serum_b <- assay(serum)[feat_serum, serum$condition == "Serum_AFA"]
## calculate cvs
cv_cells_a <- MatrixQCvis::cv(t(cv_cells_a))[[1]]
cv_cells_b <- MatrixQCvis::cv(t(cv_cells_b))[[1]]
cv_ff_a <- MatrixQCvis::cv(t(cv_ff_a))[[1]]
cv_ff_b <- MatrixQCvis::cv(t(cv_ff_b))[[1]]
cv_ff_c <- MatrixQCvis::cv(t(cv_ff_c))[[1]]
cv_ffpe_a <- MatrixQCvis::cv(t(cv_ffpe_a))[[1]]
cv_ffpe_b <- MatrixQCvis::cv(t(cv_ffpe_b))[[1]]
cv_ffpe_c <- MatrixQCvis::cv(t(cv_ffpe_c))[[1]]
cv_plasma_a <- MatrixQCvis::cv(t(cv_plasma_a))[[1]]
cv_plasma_b <- MatrixQCvis::cv(t(cv_plasma_b))[[1]]
cv_serum_a <- MatrixQCvis::cv(t(cv_serum_a))[[1]]
cv_serum_b <- MatrixQCvis::cv(t(cv_serum_b))[[1]]
## create data frames
cv_cells_a <- data.frame(protein = names(cv_cells_a), cv = cv_cells_a,
condition = "Cells_AFA", experiment = "cells")
cv_cells_b <- data.frame(protein = names(cv_cells_b), cv = cv_cells_b,
condition = "Cells_TwoPhase_AFA", experiment = "cells")
cv_ff_a <- data.frame(protein = names(cv_ff_a), cv = cv_ff_a,
condition = "powder_AFA", experiment = "fresh-frozen")
cv_ff_b <- data.frame(protein = names(cv_ff_b), cv = cv_ff_b,
condition = "powder_TwoPhase_AFA", experiment = "fresh-frozen")
cv_ff_c <- data.frame(protein = names(cv_ff_c), cv = cv_ff_c,
condition = "Tissue_bulk_AFA", experiment = "fresh-frozen")
cv_ffpe_a <- data.frame(protein = names(cv_ffpe_a), cv = cv_ffpe_a,
condition = "powder_AFA", experiment = "FFPE")
cv_ffpe_b <- data.frame(protein = names(cv_ffpe_b), cv = cv_ffpe_b,
condition = "powder_TwoPhase_AFA", experiment = "FFPE")
cv_ffpe_c <- data.frame(protein = names(cv_ffpe_c), cv = cv_ffpe_c,
condition = "FFPE_AFA", experiment = "FFPE")
cv_plasma_a <- data.frame(protein = names(cv_plasma_a), cv = cv_plasma_a,
condition = "Plasma_TwoPhase_AFA", experiment = "plasma")
cv_plasma_b <- data.frame(protein = names(cv_plasma_b), cv = cv_plasma_b,
condition = "Plasma__AFA", experiment = "plasma")
cv_serum_a <- data.frame(protein = names(cv_serum_a), cv = cv_serum_a,
condition = "Serum_TwoPhase_AFA", experiment = "serum")
cv_serum_b <- data.frame(protein = names(cv_serum_b), cv = cv_serum_b,
condition = "Serum_AFA", experiment = "serum")
## rbind data frames
cv_df <- rbind(cv_cells_a, cv_cells_b, cv_ff_a, cv_ff_b, cv_ff_c, cv_ffpe_a,
cv_ffpe_b, cv_ffpe_c, cv_plasma_a, cv_plasma_b, cv_serum_a, cv_serum_b)
cv_df$condition <- factor(cv_df$condition, levels = sort(unique(cv_df$condition)))
g <- ggplot(cv_df) +
geom_boxplot(aes(x = condition, y = cv)) +
facet_wrap(~ experiment, scales = "free_x", nrow = 1) +
theme_classic() + ylab("coefficient of variation") + xlab("") +
theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust = 1))
g## Warning: Removed 1713 rows containing non-finite values (stat_boxplot).
ggsave(g, filename = "coefficient_variation_protein_boxplot.pdf")## Saving 7 x 5 in image
## Warning: Removed 1713 rows containing non-finite values (stat_boxplot).
g <- ggplot(cv_df) +
geom_violin(aes(x = condition, y = cv)) +
facet_wrap(~ experiment, scales = "free_x", nrow = 1) +
theme_classic() + ylab("coefficient of variation") + xlab("") +
theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust = 1))
g## Warning: Removed 1713 rows containing non-finite values (stat_ydensity).
ggsave(g, filename = "coefficient_variation_protein_violinplot.pdf")## Saving 7 x 5 in image
## Warning: Removed 1713 rows containing non-finite values (stat_ydensity).
g <- ggplot(data = cv_df, aes_string(x = "condition", y = "cv", fill = "experiment")) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .8,
scale = "count") +
theme_classic() +
guides(color = "none") +
scale_fill_manual(values = as.character(wes_palette("Darjeeling1",
type = "discrete"))) +
geom_boxplot(width = .1, show.legend = FALSE, outlier.shape = NA,
alpha = 0.5) +
xlab("") + ylab("coefficient of variation") +
facet_wrap(~ experiment, scales = "free_x", nrow = 1) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
g## Warning: Removed 1713 rows containing non-finite values (stat_ydensity).
## Removed 1713 rows containing non-finite values (stat_boxplot).
ggsave(g, filename = "coefficient_variation_protein_violinboxplot.pdf")## Saving 7 x 5 in image
## Warning: Removed 1713 rows containing non-finite values (stat_ydensity).
## Removed 1713 rows containing non-finite values (stat_boxplot).
## Saving 7 x 5 in image
European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany↩︎